Summary

Manipulate the data into two distinct formats for pedagogical reasons.


In [99]:
import os
import pandas as pd

In [100]:
base_dir = os.path.join("/Users", "sbussmann", "Development", "buda", "buda-ratings")
scores_dir = os.path.join(base_dir, "data", "raw", "game_scores")
interim_dir = os.path.join(base_dir, "data", "interim")

In [101]:
# summer club league 2016
league_id = 40264

In [102]:
file_name = "scores_{}.csv".format(league_id)

In [103]:
file_path = os.path.join(scores_dir, file_name)

In [104]:
scores = pd.read_csv(file_path)

In [105]:
scores.head()


Out[105]:
Team A Team B Tourney Qualifying games* divname Score A Score B
0 AHOC Gothrilla -99 4/3 Div 1 15 12
1 AHOC BBN -99 4/3 Div 1 15 1
2 AHOC Stonecutters -99 4/3 Div 1 15 8
3 AHOC FlowChart -99 4/3 Div 1 15 10
4 AHOC Lady and the BAMF -99 4/3 Div 1 15 5

In [106]:
winloss = scores.copy()

In [107]:
winloss['Team A Wins'] = winloss['Score A'] > winloss['Score B']

In [108]:
winloss.head(20)


Out[108]:
Team A Team B Tourney Qualifying games* divname Score A Score B Team A Wins
0 AHOC Gothrilla -99 4/3 Div 1 15 12 True
1 AHOC BBN -99 4/3 Div 1 15 1 True
2 AHOC Stonecutters -99 4/3 Div 1 15 8 True
3 AHOC FlowChart -99 4/3 Div 1 15 10 True
4 AHOC Lady and the BAMF -99 4/3 Div 1 15 5 True
5 AHOC Swingers -99 4/3 Div 1 15 11 True
6 AHOC Upstream -99 4/3 Div 1 15 10 True
7 AHOC Tubbs -99 4/3 Div 1 15 6 True
8 AHOC Stonecutters -99 4/3 Div 1 15 10 True
9 AHOC Zerg Rush! -99 4/3 Div 1 15 8 True
10 AHOC Turtle Boy -99 4/3 Div 1 15 6 True
11 AHOC Live Poultry, Fresh Killed (LPFK) -99 4/3 Div 1 15 7 True
12 AHOC TuneSquad -99 4/3 Div 1 15 4 True
13 AHOC JuJu Hex -99 4/3 Div 1 15 12 True
14 Gothrilla AHOC -99 4/3 Div 1 12 15 False
15 Gothrilla Swingers -99 4/3 Div 1 9 13 False
16 Gothrilla FlowChart -99 4/3 Div 1 15 7 True
17 Gothrilla Hucky McThrowandcatch -99 4/3 Div 1 14 10 True
18 Gothrilla SnakeCountryBromance -99 4/3 Div 1 10 15 False
19 Gothrilla Zerg Rush! -99 4/3 Div 1 11 15 False

In [109]:
winloss = winloss.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)

In [110]:
winloss.head(20)


Out[110]:
Team A Team B divname Team A Wins
0 AHOC Gothrilla 4/3 Div 1 True
1 AHOC BBN 4/3 Div 1 True
2 AHOC Stonecutters 4/3 Div 1 True
3 AHOC FlowChart 4/3 Div 1 True
4 AHOC Lady and the BAMF 4/3 Div 1 True
5 AHOC Swingers 4/3 Div 1 True
6 AHOC Upstream 4/3 Div 1 True
7 AHOC Tubbs 4/3 Div 1 True
8 AHOC Stonecutters 4/3 Div 1 True
9 AHOC Zerg Rush! 4/3 Div 1 True
10 AHOC Turtle Boy 4/3 Div 1 True
11 AHOC Live Poultry, Fresh Killed (LPFK) 4/3 Div 1 True
12 AHOC TuneSquad 4/3 Div 1 True
13 AHOC JuJu Hex 4/3 Div 1 True
14 Gothrilla AHOC 4/3 Div 1 False
15 Gothrilla Swingers 4/3 Div 1 False
16 Gothrilla FlowChart 4/3 Div 1 True
17 Gothrilla Hucky McThrowandcatch 4/3 Div 1 True
18 Gothrilla SnakeCountryBromance 4/3 Div 1 False
19 Gothrilla Zerg Rush! 4/3 Div 1 False

In [111]:
teams = winloss['Team A'].unique()

In [112]:
team2index = {}
team2div = {}
for i, team in enumerate(teams):
    row = winloss['Team A'] == team
    div_team = winloss.loc[row, 'divname'].unique()[0]
    team2div[team] = div_team
    team2index[team] = i

In [113]:
winloss['Index A'] = winloss['Team A'].apply(lambda x: team2index[x])
winloss['Index B'] = winloss['Team B'].apply(lambda x: team2index[x])
winloss['Div A'] = winloss['Team A'].apply(lambda x: team2div[x])
winloss['Div B'] = winloss['Team B'].apply(lambda x: team2div[x])

In [114]:
mixed_winloss = winloss.copy()
for div in ['Open Div 1', 'Open Div 2']:
    sub1 = (mixed_winloss['Div A'] == div) | (mixed_winloss['Div B'] == div)
    mixed_winloss = mixed_winloss[~sub1]

In [115]:
mixed_winloss.shape


Out[115]:
(1044, 8)

In [116]:
mixed_winloss.to_csv(os.path.join(interim_dir, 'winloss_with_duplicates.csv'), index=None)

In [90]:
pair_list = []
for row in mixed_winloss.index:
    team_A = mixed_winloss.loc[row, 'Index A']
    team_B = mixed_winloss.loc[row, 'Index B']
    new_pair = (team_A, team_B)
    pair_list.append(new_pair)
    reverse_pair = (team_B, team_A)
    if reverse_pair in pair_list:
        mixed_winloss = mixed_winloss.drop(row)

In [91]:
mixed_winloss.shape


Out[91]:
(522, 13)

In [92]:
mixed_winloss.head()


Out[92]:
Team A Team B Tourney Qualifying games* divname Score A Score B Team A Wins Team A Loses Team A Ties Index A Index B Div A Div B
0 AHOC Gothrilla -99 4/3 Div 1 15 12 True False False 0 1 4/3 Div 1 4/3 Div 1
1 AHOC BBN -99 4/3 Div 1 15 1 True False False 0 9 4/3 Div 1 4/3 Div 1
2 AHOC Stonecutters -99 4/3 Div 1 15 8 True False False 0 41 4/3 Div 1 5/2 Div 1
3 AHOC FlowChart -99 4/3 Div 1 15 10 True False False 0 2 4/3 Div 1 4/3 Div 1
4 AHOC Lady and the BAMF -99 4/3 Div 1 15 5 True False False 0 28 4/3 Div 1 4/3 Div 2

In [93]:
mixed_winloss_flatprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B']]

In [97]:
outmwlfppath = os.path.join(interim_dir, 'winloss_simpleprior_{}.csv'.format(league_id))
mixed_winloss_flatprior.to_csv(outmwlfppath, index=None)

In [95]:
mixed_winloss_divprior = mixed_winloss[['Team A', 'Team B', 'Team A Wins', 'Index A', 'Index B', 'Div A', 'Div B']]

In [98]:
outmwldppath = os.path.join(interim_dir, 'winloss_divprior_{}.csv'.format(league_id))
mixed_winloss_divprior.to_csv(outmwldppath, index=None)

In [60]:
outwlpath = os.path.join(interim_dir, 'winloss_{}.csv'.format(league_id))
winloss.to_csv(outwlpath, index=None)

In [15]:
scorediv = scores[['Team A', 'divname']].drop_duplicates()

In [17]:
scorediv = scorediv.set_index('Team A')

In [21]:
scorediv.loc['AHOC', 'divname']


Out[21]:
'4/3 Div 1'

In [ ]:
map2div = {}
teams = scores['Team A'].unique()
for team in teams:
    index =

In [38]:
heuristicdf = scores.groupby('Team A').sum().sort_index().reset_index()

In [39]:
heuristicdf.head()


Out[39]:
Team A Tourney Qualifying games* Score A Score B
0 123 Trap! -990 61 142
1 215 Needham Street -693 98 62
2 A Lil Bit Sticky -1485 151 194
3 AHOC -1386 210 110
4 Alpha No Beta -1881 216 223

In [40]:
heuristicdf = heuristicdf.assign(PlusMinus = heuristicdf['Score A'] - heuristicdf['Score B'])

In [41]:
heuristicdf.head()


Out[41]:
Team A Tourney Qualifying games* Score A Score B PlusMinus
0 123 Trap! -990 61 142 -81
1 215 Needham Street -693 98 62 36
2 A Lil Bit Sticky -1485 151 194 -43
3 AHOC -1386 210 110 100
4 Alpha No Beta -1881 216 223 -7

In [42]:
scorediv.loc[heuristicdf['Team A'].values, 'divname'].values


Out[42]:
array(['4/3 Div 3', 'Open Div 1', '4/3 Div 2', '4/3 Div 1', '5/2 Div 3',
       '4/3 Div 1', '5/2 Div 2', '4/3 Div 3', '5/2 Div 2', '4/3 Div 3',
       '4/3 Div 2', 'Open Div 1', 'Open Div 1', '5/2 Div 3', 'Open Div 1',
       '4/3 Div 2', '4/3 Div 2', 'Open Div 1', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 1', '5/2 Div 3', '5/2 Div 2', '4/3 Div 1', '5/2 Div 3',
       '4/3 Div 1', 'Open Div 1', '5/2 Div 2', '4/3 Div 2', '5/2 Div 2',
       'Open Div 2', '4/3 Div 2', '5/2 Div 2', '4/3 Div 2', '4/3 Div 2',
       '5/2 Div 3', '5/2 Div 2', '5/2 Div 3', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 1', '5/2 Div 3', '5/2 Div 2', '4/3 Div 2', '4/3 Div 2',
       '4/3 Div 3', '5/2 Div 2', '4/3 Div 2', '5/2 Div 3', '4/3 Div 2',
       '4/3 Div 2', '5/2 Div 2', '5/2 Div 3', 'Open Div 1', '5/2 Div 1',
       'Open Div 2', '5/2 Div 1', '5/2 Div 3', '5/2 Div 3', 'Open Div 1',
       '4/3 Div 3', 'Open Div 1', '5/2 Div 1', '4/3 Div 2', '4/3 Div 1',
       '5/2 Div 3', '5/2 Div 2', '4/3 Div 2', '5/2 Div 3', '4/3 Div 3',
       '4/3 Div 3', '4/3 Div 2', '4/3 Div 1', '4/3 Div 2', '4/3 Div 1',
       '4/3 Div 3', '4/3 Div 2', '4/3 Div 1'], dtype=object)

In [43]:
heuristicdf = heuristicdf.assign(divname = scorediv.loc[heuristicdf['Team A'].values, 'divname'].values)

In [67]:
tmpwl = scores.copy()

In [68]:
tmpwl['Team A Wins'] = (tmpwl['Score A'] > tmpwl['Score B']).astype('int')
tmpwl['Team A Loses'] = (tmpwl['Score A'] < tmpwl['Score B']).astype('int')
tmpwl['Team A Ties'] = (tmpwl['Score A'] == tmpwl['Score B']).astype('int')

In [73]:
wlrecord = tmpwl.groupby('Team A').sum()

In [74]:
wlrecord.head(20)


Out[74]:
Tourney Qualifying games* Score A Score B Team A Wins Team A Loses Team A Ties
Team A
123 Trap! -990 61 142 1 9 0
215 Needham Street -693 98 62 5 1 1
A Lil Bit Sticky -1485 151 194 5 10 0
AHOC -1386 210 110 14 0 0
Alpha No Beta -1881 216 223 10 9 0
BBN -1089 111 158 2 8 1
Baba Yaga -1188 170 123 9 3 0
Baboon Heart -1188 108 143 5 7 0
Bacon -1089 143 125 7 4 0
Batman and the Robins -1188 140 143 6 6 0
Booooooosh! -891 116 116 4 5 0
Crossroads -2079 299 226 17 3 1
DISCtinguished Gentlemen -693 75 101 1 6 0
Demons -1089 140 128 5 6 0
Desert Horizon -297 28 46 0 3 0
Disc Envy -1683 218 187 9 8 0
Dominion -1287 147 153 6 7 0
Fat Bass Ultimate -990 138 127 6 4 0
Fetch -1881 225 216 8 11 0
Flaming Croissants -1980 271 205 15 5 0

In [77]:
heuristicdf = heuristicdf.set_index('Team Name').join(wlrecord)

In [79]:
heuristicdf = heuristicdf.drop(['Tourney Qualifying games*', 'Score A', 'Score B'], axis=1)

In [82]:
heuristicdf = heuristicdf.rename(columns={'Team A': 'Team Name', 
                                          'Team A Wins': 'Wins', 
                                          'Team A Loses': 'Losses', 
                                          'Team A Ties': 'Ties'})

In [83]:
heuristicdf.head()


Out[83]:
PlusMinus divname Wins Losses Ties
Team Name
123 Trap! -81 4/3 Div 3 1 9 0
215 Needham Street 36 Open Div 1 5 1 1
A Lil Bit Sticky -43 4/3 Div 2 5 10 0
AHOC 100 4/3 Div 1 14 0 0
Alpha No Beta -7 5/2 Div 3 10 9 0

In [84]:
outhuepath = os.path.join(interim_dir, 'plusminus_{}.csv'.format(league_id))
heuristicdf.to_csv(outhuepath)

In [ ]: